# -*- coding: utf-8 -*-
from scrapy import Request
from chinagd.spiders.base import BaseSpider
from abchina.rules import *


class SkuSpider(BaseSpider):
    name = 'sku'
    # 覆盖配置
    custom_settings = {
        'CONCURRENT_REQUESTS': 4,
        'DOWNLOAD_DELAY': 0.5,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 4,
        'CONCURRENT_REQUESTS_PER_IP': 4,
    }
    # 常用链接
    sku_url = 'https://e.abchina.com/qyjc/site/GenProduct/QueryPageList'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo, *args, **kwargs)
        self.page_size = 50

    def start_requests(self):
        state_list = [1]
        for state in state_list:
            curr_page = 1
            params = json.dumps({
                "ProductName": "",
                "State": state,
                "SourceId": "",
                "Category": "",
                "LowestPrice": "",
                "HighestPrice": "",
                "ProductEXCode": "",
                "InAvailableCategorys": "",
                "PageSize": self.page_size,
                "CurPage": curr_page,
                "InterfaceName": "GenProduct/QueryPageList"
            })
            yield Request(
                method='GET',
                url=self.sku_url,
                callback=self.parse_sku,
                headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                    'X-EncryptType': 'encrypt',
                },
                meta={
                    'reqType': 'catalog',
                    'batchNo': self.batch_no,
                    'params': params,
                    'page': curr_page,
                    'state': state,
                },
                errback=self.error_back,
                priority=100,
                dont_filter=True
            )

    # 处理sku列表
    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        state = meta.get('state')
        sku_list = parse_sku(response)
        if sku_list:
            # 处理本页
            self.logger.info('清单: state=%s, page=%s, cnt=%s' % (state, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)

            if cur_page == 1:
                total_page = parse_total_page(response)
                self.logger.info('页数: state=%s, total=%s' % (state, total_page))
                if total_page > 1:
                    for page in range(2, total_page + 1):
                        params = json.dumps({
                            "ProductName": "",
                            "State": state,
                            "SourceId": "",
                            "Category": "",
                            "LowestPrice": "",
                            "HighestPrice": "",
                            "ProductEXCode": "",
                            "InAvailableCategorys": "",
                            "PageSize": self.page_size,
                            "CurPage": page,
                            "InterfaceName": "GenProduct/QueryPageList"
                        })
                        yield Request(
                            method='GET',
                            url=self.sku_url,
                            callback=self.parse_sku,
                            headers={
                                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                                'X-EncryptType': 'encrypt',
                            },
                            meta={
                                'reqType': 'catalog',
                                'batchNo': self.batch_no,
                                'params': params,
                                'page': page,
                                'state': state,
                            },
                            errback=self.error_back,
                            priority=100,
                            dont_filter=True
                        )


